
#################################################################
#
# iowa
#
#################################################################

# load data
# (insert correct path for your computer...)
#iowa <- read.csv("/Users/.../Street et al 2015 Iowa_regdata.csv")
head(iowa)

## first, predict outcomes in 2008 using relationship registrations ~ search in 2004

# Deadline in 2004 was Oct 23
# Deadline (mail) in 2008 was Oct 25

## Prepare to model
s04 <- iowa$X2004search[1:63]
s04 <- s04 - min(s04)
sz04 = log(s04+.01)

r04 <- iowa$X2004cnty[1:63]
r04[54:63] <- NA
d04 <- iowa$X2004deadline[1:63]

weekend_04 <- rep(c(0, 0, 0, 1, 1, 0, 0), 9) 
# plus labor day
weekend_04[6] <- 1

s08 <- iowa$X2008search[1:65]
s08 <- s08 - min(s08)
sz08 = log(s08+.01)

r08 <- iowa$X2008cnty[1:65]
r08[56:64] <- NA

# shift 2008 deadline to election day...
d08 <- c(rep(0,64), 1)

weekend_08 <- c(rep(c(0, 0, 0, 0, 0, 1, 1), 9), 0, 0)
# plus labor day
weekend_08[1] <- 1

##################
## Now model 2004
library(mgcv)

# many knots
fit <- gam(r04 ~ s(sz04,k=15,bs="cr") + weekend_04 + d04, family=negbin(c(1,20)))
summary(fit)
plot(fit)
gam.check(fit)

# predicted
p <- exp(predict(fit, newdata=data.frame(sz04=sz08, weekend_04=weekend_08, d04=d08)))
sum(p[c(1:55,65)])

# observed
sum(iowa$X2008cnty[c(1:55,65)])




#Credible Interval for Sum and Individual Days
G=5000 #Number of MC Draws
theta = fit$family$getTheta() # Parameter for NBin Dist, we treat it as fixed now
#Need mutlivariate normal distribution to draw model parameters that characterize the mean
#structure. These parameters are asymptotically/approximately multivariate normal.
rmvn <- function(n,mu,sig) { ## MVN random deviates
  L <- mroot(sig);m <- ncol(L);
  t(mu + L%*%matrix(rnorm(m*n),m,n)) 
}
    # G replicate param. vectors, we reuse these for each predicted state by day value
    B <- rmvn(G,coef(fit),fit$Vp) 

    #Convert data to form needed based on spline using predict()
    X = predict(fit,newdata = data.frame(sz04=sz08, weekend_04=weekend_08, d04=d08),type='lpmatrix')
    
    #Predicted Means at each set of parameter values
    samps = exp(X%*%t(B))

    #Predicted Registration Counts given parameter values
    preds = apply(samps,1,function(x) rnbinom(x,size=theta,mu=x))

    #Predicted Mean Registration Count, then lower 5% and upper 95% quantiles
    pred.reg = apply(preds,2,function(x) mean(x))
    pred.low = apply(preds,2,function(x) quantile(x,c(0.05)))
    pred.high = apply(preds,2,function(x) quantile(x,c(0.95)))

plot(pred.reg,type='l',ylim=c(0,150000),col='red')
lines(iowa$X2008county)
lines(pred.low,lty=2,col='red')
lines(pred.high,lty=2,col='red')

#Credible interval for sum:
tot.preds = rowSums(preds[,c(1:55,65)])
mean(tot.preds)
quantile(tot.preds,c(0.01, 0.02, 0.03, 0.04, 0.05,.5,.95))



################################################################################
# ok now predict 2012 outcomes with model based on the 2004 data...

## Prepare to model
s04 <- iowa$X2004search[1:63]
s04 <- s04 - min(s04)
sz04 = log(s04+.01)

r04 <- iowa$X2004cnty[1:63]
r04[54:63] <- NA
d04 <- iowa$X2004deadline[1:63]

weekend_04 <- rep(c(0, 0, 0, 1, 1, 0, 0), 9) 
# plus labor day
weekend_04[6] <- 1


s12 <- iowa$X2012search
s12 <- s12 - min(s12)
sz12 = log(s12+.01)

r12 <- iowa$X2012cnty
r12[58:66] <- NA
d12 <- c(rep(0,66), 1)

weekend_12 <- c(rep(c(1,1,0,0,0,0,0),9),c(1,1,0,0))
# plus labor day
weekend_12[3] <- 1


##################
## Now model 2004
library(mgcv)

# many knots
fit <- gam(r04 ~ s(sz04,k=15,bs="cr") + weekend_04 + d04, family=negbin(c(1,20)))
summary(fit)
plot(fit)
gam.check(fit)

# predicted
p <- exp(predict(fit, newdata=data.frame(sz04=sz12, weekend_04=weekend_12, d04=d12)))
sum(p[c(1:57,67)])

# observed
sum(iowa$X2012cnty[c(1:57,67)])


#Credible Interval for Sum and Individual Days
G=5000 #Number of MC Draws
theta = fit$family$getTheta() # Parameter for NBin Dist, we treat it as fixed now
#Need mutlivariate normal distribution to draw model parameters that characterize the mean
#structure. These parameters are asymptotically/approximately multivariate normal.
rmvn <- function(n,mu,sig) { ## MVN random deviates
  L <- mroot(sig);m <- ncol(L);
  t(mu + L%*%matrix(rnorm(m*n),m,n)) 
}
    # G replicate param. vectors, we reuse these for each predicted state by day value
    B <- rmvn(G,coef(fit),fit$Vp) 

    #Convert data to form needed based on spline using predict()
    X = predict(fit,newdata = data.frame(sz04=sz12, weekend_04=weekend_12, d04=d12),type='lpmatrix')
    
    #Predicted Means at each set of parameter values
    samps = exp(X%*%t(B))

    #Predicted Registration Counts given parameter values
    preds = apply(samps,1,function(x) rnbinom(x,size=theta,mu=x))

    #Predicted Mean Registration Count, then lower 5% and upper 95% quantiles
    pred.reg = apply(preds,2,function(x) mean(x))
    pred.low = apply(preds,2,function(x) quantile(x,c(0.05)))
    pred.high = apply(preds,2,function(x) quantile(x,c(0.95)))

plot(pred.reg,type='l',ylim=c(0,200000),col='red')
lines(iowa$X2012county)
lines(pred.low,lty=2,col='red')
lines(pred.high,lty=2,col='red')


#Credible interval for sum:
tot.preds = rowSums(preds[,c(1:57,67)])
mean(tot.preds)
quantile(tot.preds,c(0.05,0.5,0.6,0.7,0.8, 0.85, 0.9, .95))


##
## End
##